<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Dealing with Human Language | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="prev" href="relevance-conclusion.html" title="Relevance Tuning Is the Last 10%">
<link rel="next" href="language-intro.html" title="Getting Started with Languages">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-node">Dealing with Human Language</span>
</div>
<div class="navheader">
<span class="prev">
<a href="relevance-conclusion.html">« Relevance Tuning Is the Last 10%</a>
</span>
<span class="next">
<a href="language-intro.html">Getting Started with Languages »</a>
</span>
</div>
<div class="part">
<div class="titlepage"><div><div>
<h1 class="title">
<a id="languages"></a>Dealing with Human Language<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/02_Dealing_with_language.asciidoc">edit</a>
</h1>
</div></div></div>
<div class="openblock partintro">
<div class="content">
<div class="blockquote">
<table border="0" class="blockquote" summary="Block quote">
<tr>
<td valign="top" width="10%"></td>
<td valign="top" width="80%">
<p>“I know all those words, but that sentence makes no sense to me.”</p>
</td>
<td valign="top" width="10%"></td>
</tr>
<tr>
<td valign="top" width="10%"></td>
<td align="right" colspan="2" valign="top">
-- <span class="attribution">Matt Groening</span>
</td>
</tr>
</table>
</div>
<p>Full-text search is a battle between <em>precision</em>—returning as few
irrelevant documents as possible—​and <em>recall</em>—returning as many relevant
documents as possible. While matching only the exact words that the user has
queried would be precise, it is not enough. We would miss out on many
documents that the user would consider to be relevant. Instead, we need to
spread the net wider, to also search for words that are not exactly the same
as the original but are related.</p>
<p>Wouldn’t you expect a search for “quick brown fox” to match a document
containing “fast brown foxes,” “Johnny Walker” to match “Johnnie
Walker,” or “Arnolt Schwarzenneger” to match “Arnold Schwarzenegger”?</p>
<p>If documents exist that <em>do</em> contain exactly what the user has queried,
those documents should appear at the top of the result set, but weaker matches
can be included further down the list.  If no documents match
exactly, at least we can show the user potential matches; they may even
be what the user originally intended!</p>
<p>There are several lines of attack:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
Remove diacritics like <code class="literal">´</code>, <code class="literal">^</code>, and <code class="literal">¨</code> so that a search for <code class="literal">rôle</code> will
also match <code class="literal">role</code>, and vice versa. See <a class="xref" href="token-normalization.html" title="Normalizing Tokens"><em>Normalizing Tokens</em></a>.
</li>
<li class="listitem">
Remove the distinction between singular and plural—<code class="literal">fox</code> versus <code class="literal">foxes</code>—or between tenses—<code class="literal">jumping</code> versus <code class="literal">jumped</code> versus <code class="literal">jumps</code>—by <em>stemming</em> each word to its root form. See <a class="xref" href="stemming.html" title="Reducing Words to Their Root Form"><em>Reducing Words to Their Root Form</em></a>.
</li>
<li class="listitem">
Remove commonly used words or <em>stopwords</em> like <code class="literal">the</code>, <code class="literal">and</code>, and <code class="literal">or</code>
to improve search performance.  See <a class="xref" href="stopwords.html" title="Stopwords: Performance Versus Precision"><em>Stopwords: Performance Versus Precision</em></a>.
</li>
<li class="listitem">
Including synonyms so that a query for <code class="literal">quick</code> could also match <code class="literal">fast</code>,
or <code class="literal">UK</code> could match <code class="literal">United Kingdom</code>. See <a class="xref" href="synonyms.html" title="Synonyms"><em>Synonyms</em></a>.
</li>
<li class="listitem">
Check for misspellings or alternate spellings, or match on <em>homophones</em>—words that sound the same, like <code class="literal">their</code> versus <code class="literal">there</code>, <code class="literal">meat</code> versus
<code class="literal">meet</code>  versus <code class="literal">mete</code>. See <a class="xref" href="fuzzy-matching.html" title="Typoes and Mispelings"><em>Typoes and Mispelings</em></a>.
</li>
</ul>
</div>
<p>Before we can manipulate individual words, we need to divide text into
words, which means that we need to know what constitutes a <em>word</em>. We will
tackle this in <a class="xref" href="identifying-words.html" title="Identifying Words"><em>Identifying Words</em></a>.</p>
<p>But first, let’s take a look at how to get started quickly and easily.</p>
</div>
</div>







</div>
<div class="navfooter">
<span class="prev">
<a href="relevance-conclusion.html">« Relevance Tuning Is the Last 10%</a>
</span>
<span class="next">
<a href="language-intro.html">Getting Started with Languages »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
